//Name: Jim Wyckoff
//Purpose: Examine NAEP scores over time
//Date: 11/14/23 updated through 7/25/25

/*
Mapping of Figures and Tables to code
Figure	Line of code
 1		166
 2		315
 3		337
 4		441
 5		483
 6		produced in excell
 A1		170-200
 A2		308-329
 A3		From TIMSS
 A4		347
 A5		209
 
Table	Line of code
 1		423
 A1		426
 A2		304
 A3		432
 A4		436
 A5  	356

*/

*****
** 	1. 	Set preferences
*****
	clear
	capture log close
	set more off, perm
	global sysdate = c(current_date)  /* do this at start so we can use it for logfile names, etc. */
	global sysdate=subinstr("$sysdate"," ","",.)  /* remove all spaces */

* setup log and define various directories:
	global drive c:  /*for harddisk folders*/
	global dir "${drive}\Users\jhw4n\Box\01 Research\NAEP"
	global logdir "${dir}\logfiles"
	global indir "${dir}\inputfiles"
	global indirfin "${drive}\Users\jhw4n\Box\01 Research\SchoolFunding\state\inputfiles"
	global indirfin "${drive}\Users\jhw4n\Box\School Finance\Analysis\inputfiles\finance"
	global outdir "${dir}\output"
	global progdir "${dir}\programs"
	graph set window fontface "Times New Roman"
  cd "${dir}" 


***********************************************************************************************************
 *** NAEP 4th and 8th grade reading and math data for the US and states downloaded from the NAEP Data Explorer 1/30/25
 *** Read in NAEP Percentile data for US and states
	import excel using "$indir\Math Grade 4", firstrow clear 
	rename Year year
	rename Jurisdiction state
	rename Averagescalescore mean 
	rename thpercentile p10
	rename F p25
	rename G p50
	rename H p75 
	rename I p90 
	drop Allstudents
	destring p10, replace force
	destring p25, replace force
	destring p50, replace force
	destring p75, replace force
	destring p90, replace force 
	destring year, replace force
	destring mean, replace force 
	gen test="M4"
	save "$indir\NAEPMath4_28Jan2025", replace 
	
	import excel using "$indir\Math Grade 8", firstrow clear 
	rename Year year
	rename Jurisdiction state
	rename Averagescalescore mean 
	rename thpercentile p10
	rename F p25
	rename G p50
	rename H p75 
	rename I p90 
	drop Allstudents
	destring p10, replace force
	destring p25, replace force
	destring p50, replace force
	destring p75, replace force
	destring p90, replace force 
	destring year, replace force
	destring mean, replace force 
	gen test="M8"
	save "$indir\NAEPMath8_28Jan2025", replace 
	
	import excel using "$indir\Reading Grade 4", firstrow clear 
	rename Year year
	rename Jurisdiction state
	rename Averagescalescore mean 
	rename thpercentile p10
	rename F p25
	rename G p50
	rename H p75 
	rename I p90 
	drop Allstudents
	destring p10, replace force
	destring p25, replace force
	destring p50, replace force
	destring p75, replace force
	destring p90, replace force 
	destring year, replace force
	destring mean, replace force 
	gen test="R4"
	save "$indir\NAEPRead4_28Jan2025", replace 
	
	import excel using "$indir\Reading Grade 8", firstrow clear 
	rename Year year
	rename Jurisdiction state
	rename Averagescalescore mean 
	rename thpercentile p10
	rename F p25
	rename G p50
	rename H p75 
	rename I p90 
	drop Allstudents
	destring p10, replace force
	destring p25, replace force
	destring p50, replace force
	destring p75, replace force
	destring p90, replace force 
	destring year, replace force
	destring mean, replace force 
	gen test="R8"
	save "$indir\NAEPRead8_28Jan2025", replace 

	append using "$indir\NAEPMath4_28Jan2025" "$indir\NAEPRead4_28Jan2025" ///
		"$indir\NAEPMath8_28Jan2025" 
	save "$indir\NAEP29Jan2025", replace 
	
*** Drop years with missing data and DoDEA, District of Columbia & Puerto Rico as states	
	use "$indir\NAEP29Jan2025", clear
	drop if year==. 
	drop if state=="DoDEA" | state=="Puerto Rico" | state=="District of Columbia"
	statastates, name(state) nogen
	replace state=strproper(state) 
	rename state_abbrev stateabb
	drop state_fips 
	replace stateabb="US" if state=="National Public"
	
*** Create differences by test and year with values for 2009		
	foreach var of varlist p10 p25 p50 p75 p90 {	
	gen `var'_2009 = `var' if year==2009
	bys stateabb test: egen `var'09=max(`var'_2009)
	gen `var'_I=(`var'/`var'09)*100
	gen `var'_D09=(`var'-`var'09)
	}
	
*** Create differences by test and year with values for 2013			
	foreach var of varlist p10 p25 p50 p75 p90 {	
	gen `var'_2013 = `var' if year==2013
	bys stateabb test: egen `var'13=max(`var'_2013)
	gen `var'_I13=(`var'/`var'13)*100
	gen `var'_D13=(`var'-`var'13)
	}
	
	tabstat p10 p25 p50 p75 p90 p10_D13 p25_D13 p50_D13 p75_D13 p90_D13 if year==2019 & stateabb=="US", by(test) 
	
	***************************************************************************************
	***** Produce US indexed Figure 1 and A1
	***************************************************************************************
	*	4th grade math	
	twoway (line p50_I13 p10_I13 p90_I13 year if state=="National Public" & test=="M4", /// 
	sort lpattern(solid longdash shortdash)lwidth(medthick)), ///
	title("US NAEP 4th Grade Math") /// 
	subtitle("Median, 90th, & 10th Percentiles")  ///
	legend(label(1 "Median") ///
	label(2 "10th Percentile") label(3 "90th Percentile") size(small)) ///  
	xlabel(1992(4)2024) xline(2013) yscale(range(90 105)) ylabel(90(5)105) ytitle(NAEP Scores) ///
	note("(NAEP Composite Scores Indexed to 2013)")
	
	*	4th grade reading	
	twoway (line p50_I13 p10_I13 p90_I13 year if state=="National Public" & test=="R4", /// 
	sort lpattern(solid longdash shortdash)lwidth(medthick)), ///
	title("US NAEP 4th Grade Reading") /// 
	subtitle("Median, 90th, & 10th Percentiles")  ///
	legend(label(1 "Median") ///
	label(2 "10th Percentile") label(3 "90th Percentile") size(small)) ///  
	xlabel(1992(4)2024) xline(2013) yscale(range(90 105)) ylabel(90(5)105) ytitle(NAEP Scores) ///
	note("(NAEP Composite Scores Indexed to 2009)")
	
	***	Figure 1 8th grade math	
	twoway (line p50_I13 p10_I13 p90_I13 year if state=="National Public" & test=="M8", /// 
	sort lpattern(solid longdash shortdash)lwidth(medthick)), ///
	title("Figure 1. US NAEP 8th Grade Math") /// 
	subtitle("Median, 90th, & 10th Percentiles")  ///
	legend(label(1 "Median") ///
	label(2 "10th Percentile") label(3 "90th Percentile") size(small)) ///  
	xlabel(1992(4)2024) xline(2013) yscale(range(90 105)) ylabel(90(5)105) ytitle(NAEP Scores) 
	
	note("(NAEP Composite Scores Indexed to 2013)")
	
	*8th grade Read 
	twoway (line p50_I13 p10_I13 p90_I13 year if state=="National Public" & test=="R8", /// 
	sort lpattern(solid longdash shortdash)lwidth(medthick)), ///
	title("US NAEP 8th Grade Reading") /// 
	subtitle("Median, 90th, & 10th Percentiles")  ///
	legend(label(1 "Median") ///
	label(2 "10th Percentile") label(3 "90th Percentile") size(small)) ///  
	xlabel(1992(4)2024) xline(2013) yscale(range(90 105)) ylabel(90(5)105) ytitle(NAEP Scores) ///
	note("(NAEP Composite Scores Indexed to 2013)")

*** Figure A5 Mississippi, US graphs 
	line p10 year if stateabb=="US" & test=="R4", /// 
	sort lpattern(longdash) lwidth(medthick) lcolor(navy), /// 
	|| line p10 year if stateabb=="MS" & test=="R4", /// 
	sort lpattern(solid) lwidth(medthick) lcolor(cranberry), /// 
	||, title("NAEP 4th Grade Reading") /// 
	subtitle("10th Percentile") legend(label(1 "US") label(2 "MS") pos(6) ring(0) /// 
	size(small)) ///  
	xlabel(1992 2000 2009 2013 2019 2024) xline(2009 2013 2019)  ytitle(NAEP Scores) 

	note("(NAEP Composite Scores)")
	
	line p10 year if stateabb=="US" & test=="R8", /// 
	sort lpattern(longdash) lwidth(medthick) lcolor(navy), /// 
	|| line p10 year if stateabb=="MS" & test=="R8", /// 
	sort lpattern(solid) lwidth(medthick) lcolor(cranberry), /// 
	||, title("NAEP 8th Grade Reading") /// 
	subtitle("10th Percentile") legend(label(1 "US") label(2 "MS") pos(6) ring(0) /// 
	size(small)) ///  
	xlabel(1992 2000 2009 2013 2019 2024) xline(2009 2013 2019) ylabel(200(5)225) ytitle(NAEP Scores) 
	
	note("(NAEP Composite Scores)")
	
	line p10 year if stateabb=="US" & test=="M4", /// 
	sort lpattern(longdash) lwidth(medthick) lcolor(navy), /// 
	|| line p10 year if stateabb=="MS" & test=="M4", /// 
	sort lpattern(solid) lwidth(medthick) lcolor(cranberry), /// 
	||, title("NAEP 4th Grade Math") /// 
	subtitle("10th Percentile") legend(label(1 "US") label(2 "MS") pos(6) ring(0) /// 
	size(small)) ///  
	xlabel(1992 2000 2009 2013 2019 2024) xline(2009 2013 2019)  ytitle(NAEP Scores) ///
	note("(NAEP Composite Scores)")
	
	line p10 year if stateabb=="US" & test=="M8", /// 
	sort lpattern(longdash) lwidth(medthick) lcolor(navy), /// 
	|| line p10 year if stateabb=="MS" & test=="M8", /// 
	sort lpattern(solid) lwidth(medthick) lcolor(cranberry), /// 
	||, title("NAEP 8th Grade Math") /// 
	subtitle("10th Percentile") legend(label(1 "US") label(2 "MS") pos(6) ring(0) /// 
	size(small)) ///  
	xlabel(1992 2000 2009 2013 2019 2024) xline(2009 2013 2019) ytitle(NAEP Scores) ///
	note("(NAEP Composite Scores)")
		
	
*** Produce US projected graphs 
  * create predicted scores for 1992-2009 that run through y=2009 various percentiles 
	gen yearz=year-2009
	foreach var of varlist p10 p25 p50 p75 p90{
	gen `var'dm=`var' - `var'09 
	gen `var'p=. 
	}	
	
	levelsof test, local(tst)
	foreach var of varlist p10 p25 p50 p75 p90{
	foreach t of local tst{		
	reg `var'dm yearz if test=="`t'" & year <= 2009 & stateabb=="US", noconstant
	predict `var'pdm if test=="`t'" & year <= 2019 & stateabb=="US"
	replace `var'p=`var'pdm + `var'09 if stateabb=="US" & test=="`t'"
	drop `var'pdm
	}
	}
		
	tabstat p10 p10p p25 p25p p50 p50p p75 p75p p90 p90p if stateabb=="US" & year==2019, by(test)

* compute change in SD units using NAEP student achievement standard deviations based on 2019 reported results
	gen sd=32 if test=="M4"
	replace sd=40 if test=="M8" 
	replace sd=39 if test=="R4" 
	replace sd=38 if test=="R8" 
	
	gen p10pdif=(p10-p10p)/sd
	gen p25pdif=(p25-p25p)/sd
	gen p50pdif=(p50-p50p)/sd
	gen p75pdif=(p75-p75p)/sd
	gen p90pdif=(p90-p90p)/sd
	
	save "$indir\temp", replace   
	use "$indir\temp", clear 
	
* create predicted scores for 2019-24 based on 2013-19 that run through y=2013 various percentiles 
	gen yeary=year-2013
	foreach var of varlist p10 p50 p90{
	gen `var'13dm=`var' - `var'13 
	gen `var'p13=. 
	}	
	
	levelsof test, local(tst)
	foreach var of varlist p10 p50 p90{
	foreach t of local tst{		
	reg `var'13dm yeary if test=="`t'" & year >= 2013 & year <=2019 & stateabb=="US", noconstant
	predict `var'p13dm if test=="`t'" & year >= 2013 & stateabb=="US"
	replace `var'p13=`var'p13dm + `var'13 if stateabb=="US" & test=="`t'"
	drop `var'p13dm
	}
	}
			
*** Table A2
	tabstat p10pdif p25pdif p50pdif p75pdif p90pdif if stateabb=="US" & year==2019, by(test)

		
*** Figure 2 and Appendix Figure A2
	twoway (line p10 p10p year if test=="M4" & stateabb=="US", /// 
	sort lpattern(longdash shortdash) lcolor(cranberry navy) lwidth(medthick)), ///
	title("US NAEP 4th Grade Math") sub("10th Percentile") /// 
	legend(label(1 "Actual") label(2 "Predicted") pos(6) ring(0)) ///   
	xlabel(1996 2002 2009 2019) xline(2009 2019, lwidth(thin)) ///
	yscale(range(175 215)) ylabel(175(10)215) ytitle(NAEP Scores)
		
	twoway (line p10 p10p year if test=="M8" & stateabb=="US", /// 
	sort lpattern(longdash shortdash) lcolor(cranberry navy) lwidth(medthick)), ///
	title("Figue 2. US NAEP 8th Grade Math") sub("10th Percentile") /// 
	legend(label(1 "Actual") label(2 "Predicted") pos(6) ring(0)) ///   
	xlabel(1996 2002 2009 2019) xline(2009 2019, lwidth(thin)) ///
	yscale(range(210 240)) ylabel(210(10)240) ytitle(NAEP Scores)
	
	twoway (line p10 p10p year if test=="R4" & stateabb=="US", /// 
	sort lpattern(longdash shortdash) lcolor(cranberry navy) lwidth(medthick)), ///
	title("US NAEP 4th Grade Reading") sub("10th Percentile") /// 
	legend(label(1 "Actual") label(2 "Predicted") pos(6) ring(0)) ///   
	xlabel(1996 2002 2009 2019) xline(2009 2019, lwidth(thin)) ///
	yscale(range(150 180)) ylabel(150(10)180) ytitle(NAEP Scores)
	
	twoway (line p10 p10p year if test=="R8" & stateabb=="US", /// 
	sort lpattern(longdash shortdash) lcolor(cranberry navy) lwidth(medthick)), ///
	title("US NAEP 8th Grade Reading") sub("10th Percentile") /// 
	legend(label(1 "Actual") label(2 "Predicted") pos(6) ring(0)) ///   
	xlabel(1996 2002 2009 2019) xline(2009 2019, lwidth(thin)) ///
	yscale(range(200 230)) ylabel(200(10)230) ytitle(NAEP Scores)
	
*** Figure 3	
	twoway (line p10 p10p13 year if test=="M8" & stateabb=="US", /// 
	sort lpattern(longdash shortdash) lcolor(cranberry navy) lwidth(medthick)), ///
	title("Figure 3. US NAEP 8th Grade Math") sub("10th Percentile") /// 
	legend(label(1 "Actual") label(2 "Predicted") pos(6) ring(0)) ///   
	xlabel(2013 2019 2024) xline(2013 2019, lwidth(thin)) ///
	yscale(range(210 240)) ylabel(210(10)240) ytitle(NAEP Scores)
	
	tabstat p10 p10p13 if test=="M8" & stateabb=="US", by(year)
		
	
*** Data for Figure A4 Graph produced in Excel 	
	sort p10_D09
	list stateabb p10_D09 if test=="M8" & year==2019 

*** produce table year when greatest score was reached during 2000 to 2019 
	set type double 
	bysort state test (year): egen p10max=max(p10) if year>=2000 & year <=2019 
	gen p10yr=year if p10==p10max & p10max != .
	bysort state test (year): egen p10yrmax=max(p10yr) if year>=2000 & year <=2019 
	drop p10yr
** Table A5 		
	table p10yrmax test  if year==2019, stat(frequency) 
		
	save "$indir\NAEP15Apr2025", replace  
			
	*******************************************************************
	*** Produce period summaries
	*******************************************************************
	
	use "$indir\NAEP15Apr2025", clear
	keep year stateabb test mean p10 p25 p50 p75 p90 sd 
	
	foreach var of varlist mean p10 p25 p50 p75 p90 {
	rename `var' `var'_
	}
		
	reshape wide mean p10 p25 p50 p75 p90, i(state test) j(year)
	
	*** calculate the changes by percentile and test across periods for Table 1
	local per p10 p25 p50 p75 p90  
	foreach p of local per{
	gen `p'_9200=(`p'_2000-`p'_1992)/8 if test=="M4" | test=="M8"
	gen `p'_0007=(`p'_2007-`p'_2000)/7 if test=="M4" | test=="M8"
	replace `p'_9200=(`p'_1998-`p'_1992)/6 if test=="R4" | test=="R8"
	replace `p'_0007=(`p'_2007-`p'_1998)/9 if test=="R4" | test=="R8"	
	gen `p'_0713=(`p'_2013-`p'_2007)/6 
	gen `p'_1319=(`p'_2019-`p'_2013)/6 	
	gen `p'_1924=(`p'_2024-`p'_2019)/5 
	replace `p'_0007=(`p'_2007-`p'_2003)/4 if `p'_0007==. & (test=="R4" | test=="R8")
	} 
	
	local per p10 p50 p90 
	foreach p of local per{
	gen `p'_9213sd=.
	gen `p'_1319sd=.
	gen `p'_1924sd=.
		}
	
	local per p10 p50 p90
	levelsof test, local(tst)
	foreach p of local per{
		foreach t of local tst{	
	replace `p'_9213sd=(`p'_2013-`p'_1992)/sd if test=="`t'" 
	replace `p'_1319sd=(`p'_2019-`p'_2013)/sd if test=="`t'" 
	replace `p'_1924sd=(`p'_2024-`p'_2019)/sd if test=="`t'" 
		}
		replace `p'_9213sd=(`p'_2013-`p'_1998)/sd if test=="R8"
	}
		
	save "$indir\NAEP22Mar2025", replace 
	
	use "$indir\NAEP22Mar2025", clear 
	
	* predict 2019-2024 learning loss based on 2013-2019
	gen p10_1324ratio=.
	gen p50_1324ratio=.
	gen p90_1324ratio=.
	
	local per p10 p50 p90
	levelsof test, local(tst)
	foreach p of local per{
		foreach t of local tst{	
	replace `p'_1324ratio=(`p'_1319/`p'_1924) if test=="`t'" 
		}
	}
		
	**************** Produce National level tables *****************************************	
	*** Table 1. 
	tabstat p10_9200 p10_0007 p10_0713 p10_1319 p10_1924 if stateabb=="US", by(test) 
	
	*** Table A1
	tabstat p*_9213sd p*_1319sd p*_1924sd if stateabb=="US", by(test) 
			
	tabstat p10_1319 p25_1319 p50_1319 p75_1319 p90_1319 if stateabb=="US", by(test)  
	tabstat p10 p50 p90 if stateabb=="US" & test=="M8", by(year) 
		
	*** Table A3 Predicted 2019-24 loss based on 2013-19 	
	tabstat p10_1319 p10_1924 p10_1324ratio p50_1319 p50_1924 p50_1324ratio p90_1319 p90_1924 p90_1324ratio /// 
	if stateabb=="US", by(test) 
	
	*** Table A4. 
	tabstat p10_1319sd p50_1319sd p90_1319sd if stateabb != "US", by(test) stat(p10 p50 p90) 
	tabstat p10_1319sd p10_1924sd, by(test) 
		
	
	**** Figure 4 histogram 
	hist p10_1319sd if test=="M8", bins(12) percent color(navy) ///
		title("Figure 4. State Changes 2013-19 NAEP 8th Grade Math (SD)") ///
		xtitle("Change 2013-19(SD)")
				
	gen p10_0919sd=(p10_2019-p10_2009)/sd
	
	gen p10_9207sd=(p10_2007-p10_1992)/sd
	gen p10_0719sd=(p10_2019-p10_2007)/sd 
	
	scatter p10_2009 p10_0919sd if test=="M8", /// 
		title("10th Perecentile NAEP 8th Grade Math Scores by State") /// 
		xtitle("Change 2009-19 (SD)") ytitle("Scale Score 2009") xline(-7.51) yline(8.67) /// 
		lpattern(dot) /// 
		mlabel(stateabb) mcolor(navy) mlabcolor(dknavy) msize(small) mlabsize(small) ///
		note("(Dashed lines are mean values. Standard Deviation = 40)")
			
	
	************************************************************************************
	* Pull in State level demographics from SEDA data and School Finance data from the F-33 Census file
	************************************************************************************
	use  "$indir\NAEP07Jan2025", clear 	
		
	* merge SEDA covariate data
	merge m:1 year stateabb using ///
		"${drive}\Users\jhw4n\Box\01 Research\SEDA\SEDA5.0\inputfiles\seda_cov_state_5.0short.dta"
	drop _merge 
	save "$indir\NAEPseda07Jan2025", replace
	
	** School finance data for states downloaded from https://nces.ed.gov/ccd/files.asp#Fiscal:1,Page:1 at various times 
	** depending on year between 2019 and 2024. To allow comparisons over time, data were indexed for the Consumer Price Index
	
	use "${drive}\Users\jhw4n\Box\01 Research\SchoolFunding\state\inputfiles\fy01_21_statecpi.dta", clear 
	drop if year < 2002 | year==2004 | year==2008
	drop if state==""
	foreach var of varlist rpprev_loc_total rpprev_state_total rpprev_fed_total rpprev_total rppexp_current /// 
		rppexp_inst_sal rppexp_inst_total rppexp_educ_total {	
	gen `var'_2009 = `var' if year==2009
	bys fipst: egen `var'09=max(`var'_2009)
	gen `var'_D09=(`var'-`var'09)
	}
	
	** Figue 5 
	hist rpprev_total_D09 if year==2019 & stateabb !="US",  bins(15) percent color(navy) ///
		title("Figure 5. 2009-2019 Change Revenue per Pupil States") xlabel(-5000(1000)5000) ///
		xtitle(State change total revenue 2009-2019) 
		
		note("Based on F-33 School Finance Data Cencus Bureau")
	
	
	
	
	